If you don’t have them already installed you need to download and install R and R studio
install.packages("tidyverse") # only needs to be done once per machine
library(tidyverse) # needs to be loaded every session you want to use it (usually everytime you open R studio)
#
# will be ignored?function_name
??term
library(tidyverse)
mpg # dataset built in to ggplot# A tibble: 234 x 11
manufacturer model displ year cyl trans drv cty hwy
<chr> <chr> <dbl> <int> <int> <chr> <chr> <int> <int>
1 audi a4 1.8 1999 4 auto(l5) f 18 29
2 audi a4 1.8 1999 4 manual(m5) f 21 29
3 audi a4 2.0 2008 4 manual(m6) f 20 31
4 audi a4 2.0 2008 4 auto(av) f 21 30
5 audi a4 2.8 1999 6 auto(l5) f 16 26
6 audi a4 2.8 1999 6 manual(m5) f 18 26
7 audi a4 3.1 2008 6 auto(av) f 18 27
8 audi a4 quattro 1.8 1999 4 manual(m5) 4 18 26
9 audi a4 quattro 1.8 1999 4 auto(l5) 4 16 25
10 audi a4 quattro 2.0 2008 4 manual(m6) 4 20 28
# ... with 224 more rows, and 2 more variables: fl <chr>, class <chr>
summary(mpg) manufacturer model displ year
Length:234 Length:234 Min. :1.600 Min. :1999
Class :character Class :character 1st Qu.:2.400 1st Qu.:1999
Mode :character Mode :character Median :3.300 Median :2004
Mean :3.472 Mean :2004
3rd Qu.:4.600 3rd Qu.:2008
Max. :7.000 Max. :2008
cyl trans drv cty
Min. :4.000 Length:234 Length:234 Min. : 9.00
1st Qu.:4.000 Class :character Class :character 1st Qu.:14.00
Median :6.000 Mode :character Mode :character Median :17.00
Mean :5.889 Mean :16.86
3rd Qu.:8.000 3rd Qu.:19.00
Max. :8.000 Max. :35.00
hwy fl class
Min. :12.00 Length:234 Length:234
1st Qu.:18.00 Class :character Class :character
Median :24.00 Mode :character Mode :character
Mean :23.44
3rd Qu.:27.00
Max. :44.00
mpg$drv [1] "f" "f" "f" "f" "f" "f" "f" "4" "4" "4" "4" "4" "4" "4" "4" "4" "4"
[18] "4" "r" "r" "r" "r" "r" "r" "r" "r" "r" "r" "4" "4" "4" "4" "f" "f"
[35] "f" "f" "f" "f" "f" "f" "f" "f" "f" "f" "f" "f" "f" "f" "4" "4" "4"
[52] "4" "4" "4" "4" "4" "4" "4" "4" "4" "4" "4" "4" "4" "4" "4" "4" "4"
[69] "4" "4" "4" "4" "4" "4" "r" "r" "r" "4" "4" "4" "4" "4" "4" "4" "4"
[86] "4" "4" "4" "4" "4" "r" "r" "r" "r" "r"
[ reached getOption("max.print") -- omitted 139 entries ]
ggplot(data = mpg) + geom_point(mapping = aes(x = displ, y = hwy))ggplot(data = mpg) + geom_smooth(mapping = aes(x = displ, y = hwy)) + geom_point(mapping = aes(x = displ,
y = hwy))By inserting x and y into ggplot all layers will use those parameters unless otherwise specified
ggplot(data = mpg, mapping = aes(x = displ, y = hwy)) + geom_smooth() + geom_point()ggplot(data = mpg, mapping = aes(x = displ, y = hwy)) + geom_smooth() + geom_point() +
geom_smooth(mapping = aes(x = displ, y = cty), color = "red")Note: if you want to plot a variable to a feature like color or size, it must go in the aes() term, if you just want to set them at a certain value they go outside the aes
ggplot(mpg, aes(displ, hwy)) + geom_smooth(color = "green") + geom_point(aes(color = class),
size = 2)You can also split the plot into subplots based on a varible using facet
ggplot(mpg, aes(displ, hwy)) + geom_point() + facet_grid(. ~ cyl)ggplot(mpg, aes(displ, hwy)) + geom_point() + facet_grid(cyl ~ .)ggplot(mpg, aes(displ, hwy)) + geom_point() + facet_wrap(~cyl)ggplot(mpg, aes(displ, hwy)) + geom_point() + facet_grid(cyl ~ class)Play with ggplot trying to make some of the following plots:
This time we will be using the diamonds dataset
summary(diamonds) carat cut color clarity
Min. :0.2000 Fair : 1610 D: 6775 SI1 :13065
1st Qu.:0.4000 Good : 4906 E: 9797 VS2 :12258
Median :0.7000 Very Good:12082 F: 9542 SI2 : 9194
Mean :0.7979 Premium :13791 G:11292 VS1 : 8171
3rd Qu.:1.0400 Ideal :21551 H: 8304 VVS2 : 5066
Max. :5.0100 I: 5422 VVS1 : 3655
J: 2808 (Other): 2531
depth table price x
Min. :43.00 Min. :43.00 Min. : 326 Min. : 0.000
1st Qu.:61.00 1st Qu.:56.00 1st Qu.: 950 1st Qu.: 4.710
Median :61.80 Median :57.00 Median : 2401 Median : 5.700
Mean :61.75 Mean :57.46 Mean : 3933 Mean : 5.731
3rd Qu.:62.50 3rd Qu.:59.00 3rd Qu.: 5324 3rd Qu.: 6.540
Max. :79.00 Max. :95.00 Max. :18823 Max. :10.740
y z
Min. : 0.000 Min. : 0.000
1st Qu.: 4.720 1st Qu.: 2.910
Median : 5.710 Median : 3.530
Mean : 5.735 Mean : 3.539
3rd Qu.: 6.540 3rd Qu.: 4.040
Max. :58.900 Max. :31.800
ggplot(diamonds, aes(cut)) + geom_bar()ggplot(diamonds, aes(price)) + geom_histogram(bins = 100)Bar charts, histograms and the other plots in the one variable section of the ggplot2 cheat sheet bin your data based on a single variable
You can determine the computed variables of a graphic by using the help function:
ggplot(diamonds) + geom_bar(aes(x = cut, y = ..prop.., group = 1))ggplot(diamonds) + geom_bar(aes(x = color, fill = cut), position = "dodge")ggplot(diamonds) + geom_bar(aes(x = color, fill = cut), position = "fill")ggplot(diamonds) + geom_bar(aes(x = color, color = cut), position = "stack",
fill = NA)ggplot(midwest, aes(x=area, y=poptotal)) +
geom_point(aes(col=state, size=popdensity)) +
geom_smooth(method="loess", se=F) + xlim(c(0, 0.1)) + ylim(c(0, 500000)) +
labs(subtitle="Area Vs Population", y="Population", x="Area", title="Scatterplot", caption = "Source: midwest")mtcars$`car name` <- rownames(mtcars) # create new column for car names
mtcars$mpg_z <- round((mtcars$mpg - mean(mtcars$mpg))/sd(mtcars$mpg), 2) # compute normalized mpg
mtcars$mpg_type <- ifelse(mtcars$mpg_z < 0, "below", "above") # above / below avg flag
mtcars <- mtcars[order(mtcars$mpg_z), ] # sort
mtcars$`car name` <- factor(mtcars$`car name`, levels = mtcars$`car name`) # convert to factor to retain sorted order in plot.
ggplot(mtcars, aes(x=`car name`, y=mpg_z, label=mpg_z)) +
geom_bar(stat='identity', aes(fill=mpg_type), width=.5) +
scale_fill_manual(name="Mileage",
labels = c("Above Average", "Below Average"),
values = c("above"="#00ba38", "below"="#f8766d")) +
labs(subtitle="Normalised mileage from 'mtcars'", title= "Diverging Bars") +
coord_flip()# prep data
df <- read.csv("https://raw.githubusercontent.com/selva86/datasets/master/gdppercap.csv")
colnames(df) <- c("continent", "1952", "1957")
left_label <- paste(df$continent, round(df$`1952`),sep=", ")
right_label <- paste(df$continent, round(df$`1957`),sep=", ")
df$class <- ifelse((df$`1957` - df$`1952`) < 0, "red", "green")
ggplot(df) + geom_segment(aes(x=1, xend=2, y=`1952`, yend=`1957`, col=class), size=.75, show.legend=F) +
geom_vline(xintercept=1, linetype="dashed", size=.1) +
geom_vline(xintercept=2, linetype="dashed", size=.1) +
scale_color_manual(labels = c("Up", "Down"),
values = c("green"="#00ba38", "red"="#f8766d")) + # color of lines
labs(x="", y="Mean GdpPerCap") + # Axis labels
xlim(.5, 2.5) + ylim(0,(1.1*(max(df$`1952`, df$`1957`)))) + # X and Y axis limits
geom_text(label=left_label, y=df$`1952`, x=rep(1, NROW(df)), hjust=1.1, size=3.5) +
geom_text(label=right_label, y=df$`1957`, x=rep(2, NROW(df)), hjust=-0.1, size=3.5) +
geom_text(label="Time 1", x=1, y=1.1*(max(df$`1952`, df$`1957`)), hjust=1.2, size=5) + # title
geom_text(label="Time 2", x=2, y=1.1*(max(df$`1952`, df$`1957`)), hjust=-0.1, size=5) + # title
theme(panel.background = element_blank(),panel.grid = element_blank(),axis.ticks = element_blank(),
axis.text.x = element_blank(),panel.border = element_blank(),plot.margin = unit(c(1,2,1,2), "cm"))ggplot(mpg, aes(cty)) +
geom_density(aes(fill=factor(cyl)), alpha=0.8) +
labs(title="Density plot",
subtitle="City Mileage Grouped by Number of cylinders",
caption="Source: mpg",
x="City Mileage",
fill="# Cylinders")ggplot(mpg, aes(class, cty)) +
geom_violin() +
labs(title="Violin plot",
subtitle="City Mileage vs Class of vehicle",
caption="Source: mpg",
x="Class of Vehicle",
y="City Mileage")theme_set(theme_classic())
df <- as.data.frame(table(mpg$class))
colnames(df) <- c("class", "freq")
ggplot(df, aes(x = "", y=freq, fill = factor(class))) +
geom_bar(width = 1, stat = "identity") +
theme(axis.line = element_blank(),
plot.title = element_text(hjust=0.5)) +
labs(fill="class",
x=NULL,
y=NULL,
title="Pie Chart of class",
caption="Source: mpg") +
coord_polar(theta = "y", start=0)EncSz <- 25
SynPermCon <- 0.5
PtPrcnt <- 0.75
SPSmpSz <- round(EncSz^2*PtPrcnt)
ENC <- rep(.3,EncSz^2)
ENC[c(19:83,200:250,353:420,497:585)] <- 1
SPEncBoxes <- tibble(x = rep(c(1:EncSz),EncSz), y = sort(rep(c(1:EncSz),EncSz)))
j <- rep(NA,EncSz^2)
j[sample(EncSz^2,SPSmpSz)] <- rnorm(SPSmpSz,mean=.9*SynPermCon,sd=SynPermCon/5)
j2 <- rep(NA,EncSz^2)
j2[j>0.5] <- 1
j2[j>0.5 & ENC ==1] <- 2
j2[is.na(j)] <- NA
EncAct <- rep(0.1,EncSz^2)
EncAct[j>SynPermCon] <- 1
j <- cut(j,breaks = c(-Inf,seq(0.4,0.6,0.025),Inf))
BlnkGrph = theme(axis.line=element_blank(), axis.text.x=element_blank(), axis.text.y=element_blank(), axis.ticks=element_blank(), axis.title.x=element_blank(),
axis.title.y=element_blank(), legend.position="none", panel.background=element_blank(), panel.border=element_blank(), panel.grid.major=element_blank(),
panel.grid.minor=element_blank(), plot.background=element_blank(),plot.margin=grid::unit(c(0,0,0,0), "mm"))
SPEncBoxes %>% ggplot(aes(x,y,fill = factor(round(ENC)))) +
geom_tile(color = "gray",show.legend=FALSE) + BlnkGrph + coord_fixed() +
geom_point(aes(x,y, color = factor(j2)),shape = 16,na.rm=TRUE, size = 3) +
scale_fill_manual(values = c("white","blue")) + scale_shape_identity() +
scale_color_manual(values = c("black","green"))SPEncBoxes %>% ggplot(aes(x,y,fill = j, color = EncAct)) +
geom_tile(show.legend=FALSE, size = 0.2,alpha=EncAct) + BlnkGrph + coord_fixed() +
scale_color_gradient(low="gray",high ="black") +
scale_fill_manual(values = c("red","red","red","red","orangered", "orange","yellow","lightgreen","green1","green1"),na.value="white")-sqrt(25) + (5 + 3)/4 * 7 - 2^2[1] 5
5%/%3 # Integer Division[1] 1
5%%3 # Modulo (remainder after division)[1] 2
5 == 6[1] FALSE
5 != 6[1] TRUE
83 > (25 >= 23)[1] TRUE
5 > 3 & 3 < 2[1] FALSE
5 > 3 | 3 < 2[1] TRUE
1:4[1] 1 2 3 4
c(5, 3, 2, 1) # Creates a vector via concaternation (hence the c)[1] 5 3 2 1
c(12, 1:4, 6)[1] 12 1 2 3 4 6
seq(from = 1, t = 10, by = 2) # Creates a vector with the given paramters[1] 1 3 5 7 9
seq(1, 10, 2) # creates the same vector without naming the paramters[1] 1 3 5 7 9
seq(1, 10) # R uses the default values for any empty parameters [1] 1 2 3 4 5 6 7 8 9 10
seq(to = 10, by = 2)[1] 1 3 5 7 9
seq(by = 2, to = 10)[1] 1 3 5 7 9
c(seq(1, 10, 2), 25, 10)[1] 1 3 5 7 9 25 10
c(seq(1, 10, 2), 25, 10) > 12[1] FALSE FALSE FALSE FALSE FALSE TRUE FALSE
c(seq(1, 10, 2), 25, 10) * 2[1] 2 6 10 14 18 50 20
x = 5 + 3
(x = 5 + 3)[1] 8
x <- 5 + 3
(x <- 5 + 3)[1] 8
y <- x
y[1] 8
x <- 5 + 3 > 2
x[1] TRUE
x <- seq(172, 23, -13)
x [1] 172 159 146 133 120 107 94 81 68 55 42 29
x <- seq(172, 23, -13)
x[1][1] 172
x[c(1, 3)][1] 172 146
x[2:4][1] 159 146 133
x[4:2][1] 133 146 159
x[] [1] 172 159 146 133 120 107 94 81 68 55 42 29
x[-1] [1] 159 146 133 120 107 94 81 68 55 42 29
x[-c(1, 3)] [1] 159 133 120 107 94 81 68 55 42 29
x[x%%2 == 0][1] 172 146 120 94 68 42
y <- x[x%%2 == 0]
y[9] <- 10
y[1] 172 146 120 94 68 42 NA NA 10
x <- 1:20
mean(x)[1] 10.5
max(x)[1] 20
min(x)[1] 1
length(x)[1] 20
range(x)[1] 1 20
prod(x)[1] 2.432902e+18
var(x)[1] 35
log(x) [1] 0.0000000 0.6931472 1.0986123 1.3862944 1.6094379 1.7917595 1.9459101
[8] 2.0794415 2.1972246 2.3025851 2.3978953 2.4849066 2.5649494 2.6390573
[15] 2.7080502 2.7725887 2.8332133 2.8903718 2.9444390 2.9957323
sqrt(x) [1] 1.000000 1.414214 1.732051 2.000000 2.236068 2.449490 2.645751
[8] 2.828427 3.000000 3.162278 3.316625 3.464102 3.605551 3.741657
[15] 3.872983 4.000000 4.123106 4.242641 4.358899 4.472136
Create a vector of 2 through 8 squared:
4, 9, 16, 25, 36, 49, 64
Create a vector of the square roots of the sum of sqaures of every pair of digits of 1 to 100:
sqrt(1^2 + 2^2), sqrt(3^2 + 4^2), sqrt(5^2 + 6^2), … , sqrt(99^2 + 100^2)
Create a vector of the numbers 1 to 100 not divisible by 3 or 5:
1, 2, 4, 7, 8, 11, 13, 14, 16, 17, … , 97, 98
iris Sepal.Length Sepal.Width Petal.Length Petal.Width Species
1 5.1 3.5 1.4 0.2 setosa
2 4.9 3.0 1.4 0.2 setosa
3 4.7 3.2 1.3 0.2 setosa
4 4.6 3.1 1.5 0.2 setosa
5 5.0 3.6 1.4 0.2 setosa
6 5.4 3.9 1.7 0.4 setosa
7 4.6 3.4 1.4 0.3 setosa
8 5.0 3.4 1.5 0.2 setosa
9 4.4 2.9 1.4 0.2 setosa
10 4.9 3.1 1.5 0.1 setosa
11 5.4 3.7 1.5 0.2 setosa
12 4.8 3.4 1.6 0.2 setosa
13 4.8 3.0 1.4 0.1 setosa
14 4.3 3.0 1.1 0.1 setosa
15 5.8 4.0 1.2 0.2 setosa
16 5.7 4.4 1.5 0.4 setosa
17 5.4 3.9 1.3 0.4 setosa
18 5.1 3.5 1.4 0.3 setosa
19 5.7 3.8 1.7 0.3 setosa
[ reached getOption("max.print") -- omitted 131 rows ]
as_tibble(iris) # shows only a few rows as well as the type of data in each row# A tibble: 150 x 5
Sepal.Length Sepal.Width Petal.Length Petal.Width Species
<dbl> <dbl> <dbl> <dbl> <fctr>
1 5.1 3.5 1.4 0.2 setosa
2 4.9 3.0 1.4 0.2 setosa
3 4.7 3.2 1.3 0.2 setosa
4 4.6 3.1 1.5 0.2 setosa
5 5.0 3.6 1.4 0.2 setosa
6 5.4 3.9 1.7 0.4 setosa
7 4.6 3.4 1.4 0.3 setosa
8 5.0 3.4 1.5 0.2 setosa
9 4.4 2.9 1.4 0.2 setosa
10 4.9 3.1 1.5 0.1 setosa
# ... with 140 more rows
iris$Spec [1] setosa setosa setosa setosa setosa setosa
[7] setosa setosa setosa setosa setosa setosa
[13] setosa setosa setosa setosa setosa setosa
[19] setosa setosa setosa setosa setosa setosa
[25] setosa setosa setosa setosa setosa setosa
[31] setosa setosa setosa setosa setosa setosa
[37] setosa setosa setosa setosa setosa setosa
[43] setosa setosa setosa setosa setosa setosa
[49] setosa setosa versicolor versicolor versicolor versicolor
[55] versicolor versicolor versicolor versicolor versicolor versicolor
[61] versicolor versicolor versicolor versicolor versicolor versicolor
[67] versicolor versicolor versicolor versicolor versicolor versicolor
[73] versicolor versicolor versicolor versicolor versicolor versicolor
[79] versicolor versicolor versicolor versicolor versicolor versicolor
[85] versicolor versicolor versicolor versicolor versicolor versicolor
[91] versicolor versicolor versicolor versicolor versicolor
[ reached getOption("max.print") -- omitted 55 entries ]
Levels: setosa versicolor virginica
as_tibble(iris)$SpecNULL
iris[1] Sepal.Length
1 5.1
2 4.9
3 4.7
4 4.6
5 5.0
6 5.4
7 4.6
8 5.0
9 4.4
10 4.9
11 5.4
12 4.8
13 4.8
14 4.3
15 5.8
16 5.7
17 5.4
18 5.1
19 5.7
20 5.1
21 5.4
22 5.1
23 4.6
24 5.1
25 4.8
26 5.0
27 5.0
28 5.2
29 5.2
30 4.7
31 4.8
32 5.4
33 5.2
34 5.5
35 4.9
36 5.0
37 5.5
38 4.9
39 4.4
40 5.1
41 5.0
42 4.5
43 4.4
44 5.0
45 5.1
46 4.8
47 5.1
48 4.6
49 5.3
50 5.0
51 7.0
52 6.4
53 6.9
54 5.5
55 6.5
56 5.7
57 6.3
58 4.9
59 6.6
60 5.2
61 5.0
62 5.9
63 6.0
64 6.1
65 5.6
66 6.7
67 5.6
68 5.8
69 6.2
70 5.6
71 5.9
72 6.1
73 6.3
74 6.1
75 6.4
76 6.6
77 6.8
78 6.7
79 6.0
80 5.7
81 5.5
82 5.5
83 5.8
84 6.0
85 5.4
86 6.0
87 6.7
88 6.3
89 5.6
90 5.5
91 5.5
92 6.1
93 5.8
94 5.0
95 5.6
[ reached getOption("max.print") -- omitted 55 rows ]
iris[, 1] [1] 5.1 4.9 4.7 4.6 5.0 5.4 4.6 5.0 4.4 4.9 5.4 4.8 4.8 4.3 5.8 5.7 5.4
[18] 5.1 5.7 5.1 5.4 5.1 4.6 5.1 4.8 5.0 5.0 5.2 5.2 4.7 4.8 5.4 5.2 5.5
[35] 4.9 5.0 5.5 4.9 4.4 5.1 5.0 4.5 4.4 5.0 5.1 4.8 5.1 4.6 5.3 5.0 7.0
[52] 6.4 6.9 5.5 6.5 5.7 6.3 4.9 6.6 5.2 5.0 5.9 6.0 6.1 5.6 6.7 5.6 5.8
[69] 6.2 5.6 5.9 6.1 6.3 6.1 6.4 6.6 6.8 6.7 6.0 5.7 5.5 5.5 5.8 6.0 5.4
[86] 6.0 6.7 6.3 5.6 5.5 5.5 6.1 5.8 5.0 5.6
[ reached getOption("max.print") -- omitted 55 entries ]
as_tibble(iris)[1]# A tibble: 150 x 1
Sepal.Length
<dbl>
1 5.1
2 4.9
3 4.7
4 4.6
5 5.0
6 5.4
7 4.6
8 5.0
9 4.4
10 4.9
# ... with 140 more rows
as_tibble(iris)[, 1]# A tibble: 150 x 1
Sepal.Length
<dbl>
1 5.1
2 4.9
3 4.7
4 4.6
5 5.0
6 5.4
7 4.6
8 5.0
9 4.4
10 4.9
# ... with 140 more rows
as_tibble(iris)[[1]] [1] 5.1 4.9 4.7 4.6 5.0 5.4 4.6 5.0 4.4 4.9 5.4 4.8 4.8 4.3 5.8 5.7 5.4
[18] 5.1 5.7 5.1 5.4 5.1 4.6 5.1 4.8 5.0 5.0 5.2 5.2 4.7 4.8 5.4 5.2 5.5
[35] 4.9 5.0 5.5 4.9 4.4 5.1 5.0 4.5 4.4 5.0 5.1 4.8 5.1 4.6 5.3 5.0 7.0
[52] 6.4 6.9 5.5 6.5 5.7 6.3 4.9 6.6 5.2 5.0 5.9 6.0 6.1 5.6 6.7 5.6 5.8
[69] 6.2 5.6 5.9 6.1 6.3 6.1 6.4 6.6 6.8 6.7 6.0 5.7 5.5 5.5 5.8 6.0 5.4
[86] 6.0 6.7 6.3 5.6 5.5 5.5 6.1 5.8 5.0 5.6
[ reached getOption("max.print") -- omitted 55 entries ]
install.packages("nycflights13")
library(nycflights13)
library(nycflights13)
flights # On-time data for all flights that departed NYC (i.e. JFK, LGA or EWR) in 2013# A tibble: 336,776 x 19
year month day dep_time sched_dep_time dep_delay arr_time
<int> <int> <int> <int> <int> <dbl> <int>
1 2013 1 1 517 515 2 830
2 2013 1 1 533 529 4 850
3 2013 1 1 542 540 2 923
4 2013 1 1 544 545 -1 1004
5 2013 1 1 554 600 -6 812
6 2013 1 1 554 558 -4 740
7 2013 1 1 555 600 -5 913
8 2013 1 1 557 600 -3 709
9 2013 1 1 557 600 -3 838
10 2013 1 1 558 600 -2 753
# ... with 336,766 more rows, and 12 more variables: sched_arr_time <int>,
# arr_delay <dbl>, carrier <chr>, flight <int>, tailnum <chr>,
# origin <chr>, dest <chr>, air_time <dbl>, distance <dbl>, hour <dbl>,
# minute <dbl>, time_hour <dttm>
summary(flights) year month day dep_time
Min. :2013 Min. : 1.000 Min. : 1.00 Min. : 1
1st Qu.:2013 1st Qu.: 4.000 1st Qu.: 8.00 1st Qu.: 907
Median :2013 Median : 7.000 Median :16.00 Median :1401
Mean :2013 Mean : 6.549 Mean :15.71 Mean :1349
3rd Qu.:2013 3rd Qu.:10.000 3rd Qu.:23.00 3rd Qu.:1744
sched_dep_time dep_delay arr_time sched_arr_time
Min. : 106 Min. : -43.00 Min. : 1 Min. : 1
1st Qu.: 906 1st Qu.: -5.00 1st Qu.:1104 1st Qu.:1124
Median :1359 Median : -2.00 Median :1535 Median :1556
Mean :1344 Mean : 12.64 Mean :1502 Mean :1536
3rd Qu.:1729 3rd Qu.: 11.00 3rd Qu.:1940 3rd Qu.:1945
arr_delay carrier flight tailnum
Min. : -86.000 Length:336776 Min. : 1 Length:336776
1st Qu.: -17.000 Class :character 1st Qu.: 553 Class :character
Median : -5.000 Mode :character Median :1496 Mode :character
Mean : 6.895 Mean :1972
3rd Qu.: 14.000 3rd Qu.:3465
origin dest air_time distance
Length:336776 Length:336776 Min. : 20.0 Min. : 17
Class :character Class :character 1st Qu.: 82.0 1st Qu.: 502
Mode :character Mode :character Median :129.0 Median : 872
Mean :150.7 Mean :1040
3rd Qu.:192.0 3rd Qu.:1389
hour minute time_hour
Min. : 1.00 Min. : 0.00 Min. :2013-01-01 05:00:00
1st Qu.: 9.00 1st Qu.: 8.00 1st Qu.:2013-04-04 13:00:00
Median :13.00 Median :29.00 Median :2013-07-03 10:00:00
Mean :13.18 Mean :26.23 Mean :2013-07-03 05:02:36
3rd Qu.:17.00 3rd Qu.:44.00 3rd Qu.:2013-10-01 07:00:00
[ reached getOption("max.print") -- omitted 2 rows ]
int integersdbl doubles or real numberschr character vectors (strings)dttm date-timedate datelgl logical (TRUE or FALSE)fctr factors (catgeorical variables with fixed possible values, e.g., dropdown list)flights[c("dep_time", "tailnum", "air_time", "time_hour")]# A tibble: 336,776 x 4
dep_time tailnum air_time time_hour
<int> <chr> <dbl> <dttm>
1 517 N14228 227 2013-01-01 05:00:00
2 533 N24211 227 2013-01-01 05:00:00
3 542 N619AA 160 2013-01-01 05:00:00
4 544 N804JB 183 2013-01-01 05:00:00
5 554 N668DN 116 2013-01-01 06:00:00
6 554 N39463 150 2013-01-01 05:00:00
7 555 N516JB 158 2013-01-01 06:00:00
8 557 N829AS 53 2013-01-01 06:00:00
9 557 N593JB 140 2013-01-01 06:00:00
10 558 N3ALAA 138 2013-01-01 06:00:00
# ... with 336,766 more rows